Matplotlib学习

学习Matplotlib中的一些笔记


Matplotlib

1
import matplotlib.pyplot as plt
1
import numpy as np
1
from numpy.random import randn
1
import pandas as pd
1
from datetime import datetime
1
%config InlineBackend.figure_format = 'svg'
1
2
3
4
5
6
7
fig = plt.figure() #创建figure,在figure上作图
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
plt.plot(np.random.randn(50).cumsum(),'k--') #在最后一个使用过的subplot上作图
ax1.hist(np.random.randn(100),bins=20,color='k',alpha=0.3)
ax2.scatter(np.arange(30),np.arange(30)+3*np.random.randn(30))
<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x1c640d9d520>
1
ax1?
1
# 直接调用AxesSubplot对象的实例方法就可以在空图内画图
1
fig, axes = plt.subplots(2,3)
<IPython.core.display.Javascript object>

1
axes[0,1]
<matplotlib.axes._subplots.AxesSubplot at 0x1c640dfa8e0>
1
axes
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001C640DCF940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001C640DFA8E0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001C640E23D30>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000001C640E57190>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001C640E7E5B0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000001C640EA8A90>]],
      dtype=object)
1
axes[0,1].hist(np.random.randn(100),bins=20,color='k',alpha=0.3)
(array([ 3.,  0.,  4.,  4.,  3.,  8.,  9., 14.,  7., 11.,  7.,  5.,  5.,
         6.,  7.,  2.,  0.,  4.,  0.,  1.]),
 array([-2.49044894, -2.21652965, -1.94261035, -1.66869105, -1.39477176,
        -1.12085246, -0.84693317, -0.57301387, -0.29909457, -0.02517528,
         0.24874402,  0.52266332,  0.79658261,  1.07050191,  1.34442121,
         1.6183405 ,  1.8922598 ,  2.16617909,  2.44009839,  2.71401769,
         2.98793698]),
 <a list of 20 Patch objects>)
1
2
3
4
5
fig,axes = plt.subplots(2,2,sharex=True,sharey=True)
for i in range(2):
for j in range(2):
axes[i,j].hist(np.random.randn(500),bins=50,color='k',alpha=0.5)
plt.subplots_adjust(wspace=0.0,hspace=0) #轴标签重叠了,需要自己调节
<IPython.core.display.Javascript object>

1
2
plt.plot(randn(30).cumsum(),'ko--') # o 标记点
# plt.plot(randn(30).cumsum(),color='k',linestyle='dashed',marker='o')
[<matplotlib.lines.Line2D at 0x1c640f12c10>]
1
data = np.random.randn(30).cumsum()
1
2
3
4
plt.plot(data,'ko--',label='Default')
plt.plot(data,'ko-',drawstyle='steps-post',label='steps-post')
# 默认线性插值 可指定其他方式
plt.legend(loc='best') # 图例,loc指定位置
<matplotlib.legend.Legend at 0x1c640ee2460>
1
2
3
4
5
6
7
8
9
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum())
ticks = ax.set_xticks([0,250,500,750,1000]) #设置刻度
labels = ax.set_xticklabels(['one','two','three','four',
'five'],rotation=30,fontsize='small')
# 设置标签
ax.set_title('this is the title')
ax.set_xlabel('Stages')
<IPython.core.display.Javascript object>

Text(0.5, 0, 'Stages')
1
2
3
4
5
6
7
# Y轴修改方法以此类推。
# 轴的类有集合方法
# props = {
# 'title':'this is a title',
# 'xlabel':'Stages'
#}
#ax.set(**props)
1
2
data = pd.read_csv('examples/spx.csv',index_col=0,parse_dates=True)
spx = data['SPX']
1
data.head()
SPX
1990-02-01 328.79
1990-02-02 330.92
1990-02-05 331.85
1990-02-06 329.66
1990-02-07 333.75
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
spx.plot(ax=ax,style='k-') #注意这里的ax=ax
crisis_data = [
(datetime(2007,10,11), 'Peak of bull market'),
(datetime(2008,3,12),'Bear Stearns Fails'),
(datetime(2008,9,15),'Lehman Bankruptcy')
]

for date,label in crisis_data:
ax.annotate(label,xy=(date,spx.asof(date)+75),
xytext=(date,spx.asof(date)+225),
arrowprops=dict(facecolor='red',headwidth=4,
width=2,headlength=4),
horizontalalignment='left',verticalalignment='top')

# zoom in on 2007-2010
ax.set_xlim(['1/1/2007','1/1/2011'])
ax.set_ylim([600,1800])
<IPython.core.display.Javascript object>

(600.0, 1800.0)
1
ax.annotate?
1
2
3
4
5
6
7
8
9
10
11
12
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

rect = plt.Rectangle((0.2,0.75),0.4,0.15,color='k',alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
color='g', alpha=0.5)
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)
ax.plot(np.arange(10))
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')
<IPython.core.display.Javascript object>

1
# plt.rc方法设置全局图像参数

pandas和seaborn绘图

1
2
s = pd.Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
s.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1c642297f70>
1
import seaborn
1
2
3
4
df = pd.DataFrame(np.random.randn(10,4).cumsum(0),
columns=['A','B','C','D'],
index = np.arange(0,100,10))
df.plot()
<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1c643132940>
1
df
A B C D
0 0.696818 -0.030235 -1.238042 -0.341480
10 0.862505 1.359178 0.549379 -1.291231
20 2.441827 1.310247 0.420022 -1.128168
30 2.387091 1.241566 -0.581826 -1.820578
40 3.251112 3.077266 -1.357327 -0.678339
50 1.563625 3.156460 -1.013992 1.069525
60 2.248198 3.671768 -0.718612 1.468115
70 2.414084 4.055347 0.580596 0.772886
80 1.096795 2.999826 -0.351196 0.254367
90 1.284084 3.652915 0.893795 1.381083
1
np.random.randn(10,4)
array([[-0.84682872, -2.23246446, -0.16477919,  0.39587882],
       [-0.35059225,  0.37097765,  0.94302706,  2.37992599],
       [ 1.64884967,  0.39846685, -1.55784035, -1.80192865],
       [ 3.12193368, -2.04909692,  0.02496328,  0.70957882],
       [ 0.05404492,  0.92240168, -0.27077722, -0.78232288],
       [ 0.65882012,  0.41724622,  2.58481926,  0.17606479],
       [ 0.15200091,  0.05766286, -1.49603979,  0.29270685],
       [ 0.58972803, -0.22044794, -0.36084333, -0.85146362],
       [ 0.14251254,  1.16289523, -1.15961946,  0.57397194],
       [ 0.05098954, -0.74228059,  0.40961916, -0.55758266]])
1
aa = np.random.randn(10,4)
1
aa.dtype
dtype('float64')
1
aa.shape
(10, 4)
1
aa.cumsum(0)
array([[-1.90723843e-01, -1.61528518e-03,  6.58199829e-01,
         1.50291186e-01],
       [ 1.34178302e-01,  2.36388466e+00,  1.25315110e+00,
         1.57643797e+00],
       [ 2.86874782e+00,  1.03346083e-01,  4.23573715e-02,
         1.66372988e+00],
       [ 3.50929845e+00,  1.65620984e+00, -1.10335802e+00,
         2.41393099e+00],
       [ 3.89687412e+00, -3.70453502e-01, -2.11251736e+00,
         2.17083809e+00],
       [ 4.93215058e+00, -1.92307370e+00, -3.25565961e+00,
         3.04624758e+00],
       [ 5.33578734e+00, -1.45887156e+00, -1.55449481e+00,
         1.24115143e+00],
       [ 4.52885738e+00, -1.34273393e+00,  2.20088228e-01,
         1.53559233e+00],
       [ 4.07925847e+00, -1.87034782e+00, -5.34155029e-01,
         2.82583291e-02],
       [ 2.28509532e+00, -5.76542176e-01, -9.48583419e-01,
        -1.17789105e+00]])
1
aa.shape
(10, 4)
1
2
3
aa = pd.DataFrame(np.random.randn(10,4),
columns=['A','B','C','D'],
index = np.arange(0,100,10))
1
aa
A B C D
0 -0.079080 -1.147150 0.263920 0.115854
10 -0.545627 0.807672 -0.045676 0.506566
20 -0.892713 1.275014 1.315923 0.780239
30 -0.015181 -1.116448 0.586800 -0.959856
40 -2.009985 0.259606 0.331871 -0.331064
50 -0.896872 -0.593908 -0.613424 0.219509
60 -0.489125 -0.573446 -0.667223 -0.885073
70 -1.208364 0.455952 -0.564741 -0.696456
80 1.066969 -0.769753 -0.184308 0.246669
90 0.701466 -0.638024 0.478407 -2.613421
1
aa.cumsum(0)
A B C D
0 -0.079080 -1.147150 0.263920 0.115854
10 -0.624707 -0.339478 0.218244 0.622420
20 -1.517420 0.935536 1.534167 1.402659
30 -1.532601 -0.180912 2.120967 0.442803
40 -3.542587 0.078694 2.452837 0.111739
50 -4.439459 -0.515214 1.839413 0.331248
60 -4.928584 -1.088660 1.172190 -0.553824
70 -6.136948 -0.632708 0.607450 -1.250281
80 -5.069979 -1.402461 0.423142 -1.003612
90 -4.368513 -2.040485 0.901549 -3.617033
1
aa.plot(subplots='True',sharey='True')
<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x000001C643196CD0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001C6431B2D30>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001C6431D4D30>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x000001C643204BE0>],
      dtype=object)

柱状图

1
2
3
4
5
6
7
fig,axes = plt.subplots(2,1)

data = pd.Series(np.random.rand(16),index=list('abcdefghijklmnop'))

data.plot.bar(ax=axes[0],color='k',alpha=0.7)
data.plot.barh(ax=axes[1],color='b',alpha=0.7)
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')
<IPython.core.display.Javascript object>

1
data
a    0.822456
b    0.596718
c    0.421539
d    0.238880
e    0.168473
f    0.426530
g    0.006231
h    0.261739
i    0.996465
j    0.414337
k    0.120072
l    0.784552
m    0.065491
n    0.603606
o    0.323153
p    0.713940
dtype: float64
1
2
3
df = pd.DataFrame(np.random.rand(6, 4),
index=['one', 'two', 'three', 'four', 'five', 'six'],
columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
1
df
Genus A B C D
one 0.998130 0.967000 0.261125 0.596219
two 0.133458 0.862444 0.577590 0.210380
three 0.744260 0.807075 0.563984 0.316443
four 0.074450 0.407504 0.126495 0.892630
five 0.167306 0.712106 0.587138 0.243097
six 0.393234 0.579052 0.502465 0.364447
1
df.plot.bar()
<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1c644681280>
1
df.plot.barh(stacked = True, alpha=0.5)
<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1c6433735e0>
1
tips = pd.read_csv('examples/tips.csv')
1
tips.head()
total_bill tip smoker day time size
0 16.99 1.01 No Sun Dinner 2
1 10.34 1.66 No Sun Dinner 3
2 21.01 3.50 No Sun Dinner 3
3 23.68 3.31 No Sun Dinner 2
4 24.59 3.61 No Sun Dinner 4
1
party_counts = pd.crosstab(tips['day'],tips['size'])
1
party_counts
size 1 2 3 4 5 6
day
Fri 1 16 1 1 0 0
Sat 2 53 18 13 1 0
Sun 0 39 15 18 3 1
Thur 1 48 4 5 1 3
1
party_counts = party_counts.loc[:,2:5]
1
party_counts
size 2 3 4 5
day
Fri 16 1 1 0
Sat 53 18 13 1
Sun 39 15 18 3
Thur 48 4 5 1
1
2
party_pcts = party_counts.div(party_counts.sum(1),axis=0) #规格化使各行的和为1
# sum(1)对行求和 axis=0 对列进行广播(每列计算)
1
party_pcts
size 2 3 4 5
day
Fri 0.888889 0.055556 0.055556 0.000000
Sat 0.623529 0.211765 0.152941 0.011765
Sun 0.520000 0.200000 0.240000 0.040000
Thur 0.827586 0.068966 0.086207 0.017241
1
party_counts.sum(1)
day
Fri     18
Sat     85
Sun     75
Thur    58
dtype: int64
1
party_pcts.plot.bar()
<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1c6433e4f40>
1
import seaborn as sns
1
tips['tip_pct'] = tips['tip'] / (tips['total_bill']-tips['tip'])
1
tips
total_bill tip smoker day time size tip_pct
0 16.99 1.01 No Sun Dinner 2 0.063204
1 10.34 1.66 No Sun Dinner 3 0.191244
2 21.01 3.50 No Sun Dinner 3 0.199886
3 23.68 3.31 No Sun Dinner 2 0.162494
4 24.59 3.61 No Sun Dinner 4 0.172069
... ... ... ... ... ... ... ...
239 29.03 5.92 No Sat Dinner 3 0.256166
240 27.18 2.00 Yes Sat Dinner 2 0.079428
241 22.67 2.00 Yes Sat Dinner 2 0.096759
242 17.82 1.75 No Sat Dinner 2 0.108899
243 18.78 3.00 No Thur Dinner 2 0.190114

244 rows × 7 columns

1
tips.head()
total_bill tip smoker day time size tip_pct
0 16.99 1.01 No Sun Dinner 2 0.063204
1 10.34 1.66 No Sun Dinner 3 0.191244
2 21.01 3.50 No Sun Dinner 3 0.199886
3 23.68 3.31 No Sun Dinner 2 0.162494
4 24.59 3.61 No Sun Dinner 4 0.172069
1
tips.day.value_counts()
Sat     87
Sun     76
Thur    62
Fri     19
Name: day, dtype: int64
1
sns.barplot(x='tip_pct',y='day',data=tips,orient='h')
<matplotlib.axes._subplots.AxesSubplot at 0x1c6433e4f40>
1
sns.barplot(x='tip_pct',y='day',hue='time',data=tips,orient='h')
<matplotlib.axes._subplots.AxesSubplot at 0x1c6433e4f40>

直方图和密度图

1
tips['tip_pct'].plot.hist(bins=50,grid=True)
<matplotlib.axes._subplots.AxesSubplot at 0x1c6433e4f40>
1
tips['tip_pct'].plot.density()
<matplotlib.axes._subplots.AxesSubplot at 0x1c6433e4f40>
1
2
3
4
comp1 = np.random.normal(0,1,size=200)
comp2 = np.random.normal(10,2,size=200)
values = pd.Series(np.concatenate([comp1,comp2])) # comp1和comp2连接起来
sns.distplot(values,bins=100,color='k') # bins 直方图柱子的个数
<matplotlib.axes._subplots.AxesSubplot at 0x1c6433e4f40>

散点图或点图

1
2
3
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi','m1','tbilrate','unemp']]
trans_data=np.log(data).diff().dropna() # diff 两项作差
1
trans_data.head()
cpi m1 tbilrate unemp
1 0.005849 0.014215 0.088193 -0.128617
2 0.006838 -0.008505 0.215321 0.038466
3 0.000681 -0.003565 0.125317 0.055060
4 0.005772 -0.002861 -0.212805 -0.074108
5 0.000338 0.004289 -0.266946 0.000000
1
2
sns.regplot('m1','unemp',data=trans_data)
plt.title('Change in log %s versus log %s' % ('m1','unemp'))
Text(0.5, 1.0, 'Change in log m1 versus log unemp')
1
2
sns.pairplot(trans_data,diag_kind='kde',plot_kws={'alpha':0.2})
# plot_kws 传递配置选项到非对角线元素上的图形使用
<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x1c6436d1c40>

分面网格(facet grid)和类型数据

1
2
sns.catplot(x='day',y='tip_pct',hue='time',col='smoker',
kind='bar',data=tips[tips.tip_pct<1])
<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1c6436d1610>
1
2
sns.catplot(x='day',y='tip_pct',row='time',col='smoker',kind='bar',
data = tips[tips.tip_pct<1])
<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1c6436d1bb0>
1
sns.catplot(x='tip_pct',y='day',kind='box',data=tips[tips.tip_pct<0.5])
<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1c6447a14c0>
1
tips.head()
total_bill tip smoker day time size tip_pct
0 16.99 1.01 No Sun Dinner 2 0.063204
1 10.34 1.66 No Sun Dinner 3 0.191244
2 21.01 3.50 No Sun Dinner 3 0.199886
3 23.68 3.31 No Sun Dinner 2 0.162494
4 24.59 3.61 No Sun Dinner 4 0.172069

0%